** Estimate the pass-through with different assumptions about spillovers using data from the ACS, using poor counties 
** Generate relevant variables: Weighted share of wages earned by MW affected workers 
** JHL

*************************************
** Set up workspace
*************************************
version 14.0
clear all
set more off

cd "${path_home}"
adopath + ../programs

** log using "${path_log}/c07_acs_mw_shares_0615_06q1", text replace

*************************************
** Start work here
*************************************

timer clear
timer on 1 

*************************************
** [1] Clean the data 
*************************************

** 2006-2015 
** local y: environment SLURM_ARRAY_TASK_ID 

foreach y of numlist 2006/2015 {

	use "${path_big_dta}/acs/acs_wages_demographics_0615" if year == `y', clear

	** Merge with MW annual data
	gen statefips = statefip 
	merge m:1 statefips year using "${path_big_dta}/mw/VZ/VZ_state_stata/VZ_state_annual.dta", keep(match) nogen 
	
	
	replace incwage = . if incwage == 999999
	replace hhincome = . if hhincome == 9999999
	ta wkswork2 
	* Convert weeks worked bracket to continuous using mid-points (52 for 50-52 since CPS MORG suggested) 
	gen wkswork2_cont = 7 if wkswork2 == 1
		replace wkswork2_cont = 20 if wkswork2 == 2 
		replace wkswork2_cont = 33 if wkswork2 == 3 
		replace wkswork2_cont = 43.5 if wkswork2 == 4 
		replace wkswork2_cont = 48.5 if wkswork2 == 5 
		replace wkswork2_cont = 52 if wkswork2 == 6 
		
	gen hourw = incwage/(uhrswork*wkswork2_cont)
	
	gen mw = mean_mw 
	
	gen mw_over_hourw = mean_mw / hourw 
	
	** SAMPLE SELECTION, criteria from AMS (2016) and some standard procedures ** 
	** Keep positive wages 
	keep if hourw > 0
	keep if age>=18&age<=64
	** Drop self-employed and unpaid family workers
	drop if classwkrd==13|classwkrd==14|classwkrd==29

	gen fips_state_code = statefip
	gen fips_county_code = countyfips
	** Only 522 fips because most don't show county code, only state code 
	merge m:1 fips_state_code fips_county_code using "${path_big_dta}/qcew/qcew_county_wage_q24_0415", keep(match master) nogen 

	gen mw_worker = (hourw <= mean_mw) 

		* Share of wages earned by MW workers by Kaitz index, quartiles
		foreach q of numlist 1/4 {
			foreach i of numlist 0/1 {
				cap drop mw_earned_`i'_q`q'_4q 
				cap gen mw_earned_`i'_q`q'_4q = . 
				cap total incwage [ pweight = perwt ] if mw_worker == `i' & kaitz_y2006_qtr1_q4_c == `q' 
				cap mat b=e(b)
				cap replace mw_earned_`i'_q`q'_4q = b[1,1]
				
			}
			
			* Share of wages earned by MW workers 
			cap gen mwshare_nat_q`q'_4q_`y'= (mw_earned_1_q`q'_4q)/(mw_earned_0_q`q'_4q + mw_earned_1_q`q'_4q)
		}
		
		* Share of wages earned by MW workers by Kaitz index, above below median  
		foreach q of numlist 1/2 {
			foreach i of numlist 0/1 {
				cap drop mw_earned_`i'_q`q'_2q 
				cap gen mw_earned_`i'_q`q'_2q  = . 
				cap total incwage [ pweight = perwt ] if mw_worker == `i' & kaitz_y2006_qtr1_q2_c == `q' 
				cap mat b=e(b)
				cap replace mw_earned_`i'_q`q'_2q  = b[1,1]
				
			}
			
			* Share of wages earned by MW workers 
			cap gen mwshare_nat_q`q'_2q_`y'  = (mw_earned_1_q`q'_2q )/(mw_earned_0_q`q'_2q  + mw_earned_1_q`q'_2q )
		}
	
		* Share of wages earned by MW workers 
		foreach i of numlist 0/1 {
			cap drop mw_earned_`i'
			cap gen mw_earned_`i'  = . 
			cap total incwage [ pweight = perwt ] if mw_worker == `i'
			cap mat b=e(b)
			cap replace mw_earned_`i'  = b[1,1]
			
		}
		
		cap gen mwshare_nat_`y'  = (mw_earned_1)/(mw_earned_0  + mw_earned_1 )
	
	** Share of wages earned by MW workers, by industry, poor counties, rich counties, and all counties 
	foreach i in 4451 44611 45211 722Z {
		* Total earnings
			* Poor counties 
		foreach n of numlist 0/1 {
			cap drop mw_earned_`n'_i`i'_poor_`y' 
			cap total incwage [ pweight = perwt ] if mw_worker == `n' & (kaitz_y2006_qtr1_q4_c == 3 | kaitz_y2006_qtr1_q4_c == 4) & indnaics == "`i'"
			cap mat b=e(b)
			cap gen mw_earned_`n'_i`i'_poor_`y' = b[1,1]
		}
		
			* Rich counties 
		foreach n of numlist 0/1 {
			cap drop mw_earned_`n'_i`i'_rich_`y' 
			cap total incwage [ pweight = perwt ] if mw_worker == `n' & (kaitz_y2006_qtr1_q4_c == 1 | kaitz_y2006_qtr1_q4_c == 2) & indnaics == "`i'"
			cap mat b=e(b)
			cap gen mw_earned_`n'_i`i'_rich_`y' = b[1,1]
		}
		
		* Share of wages earned by MW workers
			* Poor counties
		cap drop mwshare_i`i'_poor_`y'
		cap gen mwshare_i`i'_poor_`y' = (mw_earned_1_i`i'_poor_`y')/(mw_earned_0_i`i'_poor_`y' + mw_earned_1_i`i'_poor_`y')
		cap drop mw_earned_0_i`i'_poor_`y' mw_earned_1_i`i'_poor_`y' 
		su mwshare_i`i'_poor_`y' 
		
			* Rich counties
		cap drop mwshare_i`i'_rich_`y'
		cap gen mwshare_i`i'_rich_`y' = (mw_earned_1_i`i'_rich_`y')/(mw_earned_0_i`i'_rich_`y' + mw_earned_1_i`i'_rich_`y')
		cap drop mw_earned_0_i`i'_rich_`y' mw_earned_1_i`i'_rich_`y' 
		su mwshare_i`i'_rich_`y' 	
		
		* Overall total earnings
		foreach n of numlist 0/1 {
			cap drop mw_earned_`n'_i`i'_`y'
			cap total incwage [ pweight = perwt ] if mw_worker == `n' & indnaics == "`i'"
			cap mat b=e(b)
			cap gen mw_earned_`n'_i`i'_`y' = b[1,1]
		}
		
		* Share of wages earned by MW workers
		cap drop mwshare_i`i'_`y'
		cap gen mwshare_i`i'_`y' = (mw_earned_1_i`i'_`y')/(mw_earned_0_i`i'_`y' + mw_earned_1_i`i'_`y')
		cap drop mw_earned_0_i`i'_`y' mw_earned_1_i`i'_`y'
		* 
		su mwshare_i`i'_`y' 
	}
	
		** Start with the national distribution, percentiles and proportion earned for each percentile 
		** Note: I only calculate the percentile for those at least at the minimum wage, since the ACS has a lot of part time folks with measurement error in hours
		_pctile hourw [w=perwt] if year==`y' & hourw >= mw, nq(100)
		foreach p of numlist 1/99 {
			local p`p' `r(r`p')'
		}
		di `y'
		cap noi gen nat_pct_`y'=.
		cap noi gen nat_tot_`y'=. 
		foreach p of numlist 1/99 {
			cap qui replace nat_pct_`y'=`p`p'' in `p'
			* Use 99th percentile (or winsorize) vs. total (outliers) 
			cap qui total incwage [pweight=perwt] if year==`y'&hourw<=`p`p''
			cap mat b=e(b)
			cap qui replace nat_tot_`y'=b[1,1] in `p' 
			
		}
		
			* 100th percentile 
			su hourw [w=perwt] if year==`y'
			cap qui replace nat_pct_`y'=`r(max)' in 100
			cap qui total incwage [pweight=perwt] if year==`y' 
			cap replace nat_tot_`y'=b[1,1] in 100 
		
		gen fips = fips_state_code*1000 + fips_county_code

		** Calculate labor cost shares by percentile and by industry 
		** For each industry, calculate their within-sector percentile and proportion using national percentiles
		foreach i in 4451 44611 45211 722Z {
			di "`i' All"
			* Number of distinct households
			distinct serial if indnaics=="`i'" & year==`y'
			cap noi gen i`i'_nat_count_`y' = `r(ndistinct)'			
			* National or within-sector distribution 
			_pctile hourw [ w = perwt ] if indnaics=="`i'" & year==`y' , nq(100)
			foreach p of numlist 1/99 {
				local i`i'p`p' `r(r`p')'
			}
			di `y'
			cap noi gen i`i'_pct_`y'=.
			cap noi gen i`i'_tot_`y'=. 
			
			foreach p of numlist 1/99 {
				cap qui replace i`i'_pct_`y'=`i`i'p`p'' in `p' 
				* Using national percentiles
				cap qui total incwage [pweight=perwt] if indnaics=="`i'"&year==`y'&hourw<=`p`p'' 
				cap mat b=e(b)
				cap qui replace i`i'_tot_`y'=b[1,1] in `p' 
				
			}
				cap su hourw [w=perwt] if indnaics=="`i'"&year==`y' 
				cap qui replace i`i'_pct_`y'=`r(max)' in 100
				
				cap qui total incwage [pweight=perwt] if indnaics=="`i'"&year==`y' 
				cap mat b=e(b)
				cap replace i`i'_tot_`y'=b[1,1] in 100 
		}
	
		** Use rich counties only, get their cost shares by percentile by industry ** 
		** Note: I only calculate the percentile for those at least at the minimum wage, since the ACS has a lot of part time workers with potential measurement error in hours
		_pctile hourw [w=perwt] if year==`y' & (kaitz_y2006_qtr1_q4_c == 1 | kaitz_y2006_qtr1_q4_c == 2) & hourw >= mw, nq(100)
		foreach p of numlist 1/99 {
			local rich_p`p' `r(r`p')'
		}
		di `y'
		cap noi gen rich_pct_`y'=.
		cap noi gen rich_tot_`y'=. 
		foreach p of numlist 1/99 {
			cap qui replace rich_pct_`y'=`rich_p`p'' in `p'
			** Use 99th percentile (or winsorize) vs. total (outliers) 
			cap qui total incwage [pweight=perwt] if year==`y'&hourw<=`rich_p`p'' & (kaitz_y2006_qtr1_q4_c == 1 | kaitz_y2006_qtr1_q4_c == 2)
			cap mat b=e(b)
			cap qui replace rich_tot_`y'=b[1,1] in `p' 
			
		}
		
			** 100th percentile 
			su hourw [w=perwt] if year==`y' & (kaitz_y2006_qtr1_q4_c == 1 | kaitz_y2006_qtr1_q4_c == 2)
			cap qui replace rich_pct_`y'=`r(max)' in 100
			cap qui total incwage [pweight=perwt] if year==`y' & (kaitz_y2006_qtr1_q4_c == 1 | kaitz_y2006_qtr1_q4_c == 2)
			cap replace rich_tot_`y'=b[1,1] in 100 
			
		** For each industry, calculate their within-sector percentile and proportion using both national and within percentiles
		foreach i in 4451 44611 45211 722Z {
			di "`i' Rich" 
			* Number of distinct households
			distinct serial if indnaics=="`i'" & year==`y' & (kaitz_y2006_qtr1_q4_c == 1 | kaitz_y2006_qtr1_q4_c == 2)
			cap noi gen i`i'_rich_count_`y' = `r(ndistinct)'
			* National distribution
			_pctile hourw [ w = perwt ] if indnaics=="`i'" & year==`y' & (kaitz_y2006_qtr1_q4_c == 1 | kaitz_y2006_qtr1_q4_c == 2) , nq(100)
			foreach p of numlist 1/99 {
				local i`i'p`p' `r(r`p')'
			}
			di `y'
			cap noi gen i`i'_rich_pct_`y'=.
			cap noi gen i`i'_rich_tot_`y'=. 

			foreach p of numlist 1/99 {
				cap qui replace i`i'_rich_pct_`y'=`i`i'p`p'' in `p' 
				* Using national percentiles
				cap qui total incwage [pweight=perwt] if indnaics=="`i'"&year==`y'&hourw<=`p`p'' & (kaitz_y2006_qtr1_q4_c == 1 | kaitz_y2006_qtr1_q4_c == 2)
				cap mat b=e(b)
				cap qui replace i`i'_rich_tot_`y'=b[1,1] in `p' 
				
			}
				cap su hourw [w=perwt] if indnaics=="`i'"&year==`y' & (kaitz_y2006_qtr1_q4_c == 1 | kaitz_y2006_qtr1_q4_c == 2)
				cap qui replace i`i'_rich_pct_`y'=`r(max)' in 100
				
				cap qui total incwage [pweight=perwt] if indnaics=="`i'"&year==`y' & (kaitz_y2006_qtr1_q4_c == 1 | kaitz_y2006_qtr1_q4_c == 2)
				cap mat b=e(b)
				cap replace i`i'_rich_tot_`y'=b[1,1] in 100 
		}
		
		** Use poor counties only, get their cost shares by percentile by industry ** 
		** Note: I only calculate the percentile for those at least at the minimum wage, since the ACS has a lot of part time workers with potential measurement error in hours
		_pctile hourw [w=perwt] if year==`y' & (kaitz_y2006_qtr1_q4_c == 3 | kaitz_y2006_qtr1_q4_c == 4) & hourw >= mw, nq(100)
		foreach p of numlist 1/99 {
			local poor_p`p' `r(r`p')'
		}
		di `y'
		cap noi gen poor_pct_`y'=.
		cap noi gen poor_tot_`y'=. 
		foreach p of numlist 1/99 {
			cap qui replace poor_pct_`y'=`poor_p`p'' in `p'
			* Use 99th percentile (or winsorize) vs. total (outliers) 
			cap qui total incwage [pweight=perwt] if year==`y'&hourw<=`poor_p`p'' & (kaitz_y2006_qtr1_q4_c == 3 | kaitz_y2006_qtr1_q4_c == 4)
			cap mat b=e(b)
			cap qui replace poor_tot_`y'=b[1,1] in `p' 
			
		}
		
			** 100th percentile 
			su hourw [w=perwt] if year==`y' & (kaitz_y2006_qtr1_q4_c == 3 | kaitz_y2006_qtr1_q4_c == 4)
			cap qui replace poor_pct_`y'=`r(max)' in 100
			cap qui total incwage [pweight=perwt] if year==`y' & (kaitz_y2006_qtr1_q4_c == 3 | kaitz_y2006_qtr1_q4_c == 4)
			cap replace poor_tot_`y'=b[1,1] in 100 
			
		** For each industry, calculate their within-sector percentile and proportion using national percentiles
		foreach i in 4451 44611 45211 722Z {
			di "`i' Poor"
			* Number of distinct households
			distinct serial if indnaics=="`i'" & year==`y' & (kaitz_y2006_qtr1_q4_c == 3 | kaitz_y2006_qtr1_q4_c == 4)
			cap noi gen i`i'_poor_count_`y' = `r(ndistinct)'
			* National distribution
			_pctile hourw [ w = perwt ] if indnaics=="`i'" & year==`y' & (kaitz_y2006_qtr1_q4_c == 3 | kaitz_y2006_qtr1_q4_c == 4) , nq(100)
			foreach p of numlist 1/99 {
				local i`i'p`p' `r(r`p')'
			}
			di `y'
			cap noi gen i`i'_poor_pct_`y'=.
			cap noi gen i`i'_poor_tot_`y'=. 

			foreach p of numlist 1/99 {
				cap qui replace i`i'_poor_pct_`y'=`i`i'p`p'' in `p' 
				* Using national percentiles
				cap qui total incwage [pweight=perwt] if indnaics=="`i'"&year==`y'&hourw<=`p`p'' & (kaitz_y2006_qtr1_q4_c == 3 | kaitz_y2006_qtr1_q4_c == 4)
				cap mat b=e(b)
				cap qui replace i`i'_poor_tot_`y'=b[1,1] in `p' 
				
			}
				cap su hourw [w=perwt] if indnaics=="`i'"&year==`y' & (kaitz_y2006_qtr1_q4_c == 3 | kaitz_y2006_qtr1_q4_c == 4)
				cap qui replace i`i'_poor_pct_`y'=`r(max)' in 100
				
				cap qui total incwage [pweight=perwt] if indnaics=="`i'"&year==`y' & (kaitz_y2006_qtr1_q4_c == 3 | kaitz_y2006_qtr1_q4_c == 4)
				cap mat b=e(b)
				cap replace i`i'_poor_tot_`y'=b[1,1] in 100 
		}
	
	keep mwshare_nat_q1_2q_`y' mwshare_nat_q2_2q_`y' mwshare_i4451_poor_`y'-mwshare_i722Z_`y' nat_pct_`y'-i722Z_poor_tot_`y'

	keep in 1/100
	gen id=_n

	save "${path_dta}/acs/mwshares_`y'", replace

}

*************************************
** Close workspace
*************************************
timer off 1
timer list 1
** log close